First we can look at the concentrations in each zip code and day.
Next we can look at the cases in each zip code and day.
Next we’ll classify each day-zip combination as experiencing some wildfire exposure or not. Wildfire period is defined as any day where the zip code has a PM2.5 value greater than zero (note: some are very low). See how the WF period(s) is/are more clearly the second plot which uses a threshold of 1, instead of 0.
Now we can begin to calculate RR, first for the entire zip codes (not race stratified). We’ll use a constant of o.1 to fill in spots where there are no cases during the exposure or control periods.
constant <- 0.1
rates_overall <- foo %>%
mutate(total = white + hispanic + black + native_am + asian_pi + other + missing) %>%
group_by(zip, wildfire) %>%
summarise(cases = sum(total),
days = length(svc_from_dt)) %>%
mutate(casesPerDay = cases/days)
RR_overall <- foo %>%
mutate(total = white + hispanic + black + native_am + asian_pi + other + missing) %>%
group_by(zip, wildfire) %>%
summarise(cases = sum(total),
days = length(svc_from_dt)) %>%
mutate(cases = ifelse(cases == 0, constant, cases),
casesPerDay = cases/days) %>%
select(zip, wildfire, casesPerDay) %>%
spread(key = wildfire, value = casesPerDay) %>%
mutate(RateRatio = WFperiod/nonWFperiod,
ln_RateRatio = log(RateRatio)) %>%
rename(WF_rate = WFperiod,
nonWF_rate = nonWFperiod)
DT::datatable({rates_overall}) %>% DT::formatRound(5,5)
DT::datatable({RR_overall}) %>% DT::formatRound(c(2:5),3)
Plot Overall rate ratios
## Calculating race-specific RRNow we can begin to calculate race-specific RR.
cases <- foo %>%
group_by(zip, wildfire) %>%
summarise(white = sum(white),
hispanic = sum(hispanic),
black = sum(black),
native_am = sum(native_am),
asian_pi = sum(asian_pi),
other = sum(other),
days = length(svc_from_dt)) %>%
gather(white, hispanic, black, native_am, asian_pi,other, key = race, value = cases) %>%
select(-days) %>%
spread(key = wildfire, value = cases) %>%
mutate(remove = ifelse(WFperiod == 0 & nonWFperiod == 0, "remove","keep")) %>%
rename(WF_cases = WFperiod,
noWF_cases = nonWFperiod)
rates <- foo %>%
group_by(zip, wildfire) %>%
summarise(white = sum(white),
hispanic = sum(hispanic),
black = sum(black),
native_am = sum(native_am),
asian_pi = sum(asian_pi),
other = sum(other),
days = length(svc_from_dt)) %>%
gather(white, hispanic, black, native_am, asian_pi,other, key = race, value = cases) %>%
mutate(casesPerDay = ifelse(cases == 0, constant/days, cases/days)) %>%
select(-days, -cases) %>%
spread(key = wildfire, value = casesPerDay) %>%
rename(WF_rate = WFperiod,
noWF_rate = nonWFperiod)
RR_race <- merge(cases, rates) %>%
mutate(RateRatio = WF_rate/noWF_rate,
ln_RateRatio = log(WF_rate/noWF_rate))
DT::datatable({RR_race}) %>% DT::formatRound(c(3:6),3)
Look at the distrubutions of race_specific rate ratios, need to look into why so many of these are focused around 1.5…?
RR_race %>%
ggplot() + geom_boxplot(aes(x= race, y= ln_RateRatio))+ geom_jitter(aes(x= race, y= ln_RateRatio, color=race), alpha=0.2) + guides(color=FALSE)
There is a big difference in the RRs after removing the data.
RR_race %>%
filter(remove == "keep") %>%
ggplot() + geom_boxplot(aes(x= race, y= ln_RateRatio))+ geom_jitter(aes(x= race, y= ln_RateRatio, color=race), alpha=0.2) + guides(color=FALSE)
Two parameters:
need to be examined to show how they may change the results.
rm(booHoo)
constants <- c(0.01, 0.05,0.1,0.2,0.3,0.4, 0.5)
thresholds <- c(0:45)
for(constant in constants){
for(threshold in thresholds){
boo <- read.dta("~/Wildfires_kurt/race eth of asthma ED visits by zip day_exp merged.dta") %>%
mutate(wildfire = ifelse(pm_25 > threshold, "WFperiod", "nonWFperiod"),
total = white + hispanic + black + native_am + asian_pi + other + missing) %>%
rename(zip = bene_addr_zip) %>%
group_by(zip, wildfire) %>%
summarise(cases = sum(total),
days = length(svc_from_dt)) %>%
mutate(cases = ifelse(cases == 0, constant, cases),
casesPerDay = cases/days) %>%
select(zip, wildfire, casesPerDay) %>%
spread(key = wildfire, value = casesPerDay) %>%
mutate(RateRatio = WFperiod/nonWFperiod,
ln_RateRatio = log(WFperiod/nonWFperiod),
const = constant,
thresh = threshold)
if(exists("booHoo")) {
booHoo <- bind_rows(booHoo, boo)
}
else {
booHoo <- boo
}
}
}
booHoo %>%
mutate(county = "San Diego") %>%
ggplot() + geom_boxplot(aes(x=county, y= ln_RateRatio))+ geom_jitter(aes(x=county, y= ln_RateRatio), alpha=0.2) + guides(color=FALSE) + facet_grid(const~thresh)
booHoo %>%
group_by(const, thresh)%>%
summarise(mean_LnRR = mean(ln_RateRatio, na.rm=T),
var_LnRR = var(ln_RateRatio, na.rm=T),
min_LnRR = min(ln_RateRatio, na.rm=T),
max_LnRR = max(ln_RateRatio, na.rm=T)) %>%
gather(min_LnRR,mean_LnRR,max_LnRR,var_LnRR, key = statistic, value = value) %>%
filter(statistic == "mean_LnRR") %>%
ggplot() + geom_tile(aes(x=factor(thresh), y=factor(const), fill= value)) +
facet_grid(statistic~.) +
scale_fill_gradient(low = "red", high = "green") +
ylab("value of the constant usd to replace no cases") +
xlab("value above which we define the wildfire period")
booHoo %>%
ggplot() +
geom_smooth(aes(x=thresh, y= ln_RateRatio, group=const, color=factor(const)), method = "auto")
Combine with other covariates, calculate the weights and